###########
##R SETUP##
###########
rm(list=ls())
require(foreign)
require(dplyr)
require(gmodels)
require(ggplot2)
require(plm)
require(lmtest)
require(stargazer)
require(ggmap)
require(maps)
require(rgdal)
require(multiwayvcov)
require(lme4)
require(car)
require(CBPS)

load("years.Rda")
india <- readOGR(dsn = "Census_2011")


#############
##DATA PREP##
#############
### Scaling, transforming and dropping missing ###
#Population-household control variable
analysis$log_households <- log(analysis$total_households)

#Distance to town control variable
analysis$c01_2001_dist_town <- as.numeric(as.character(analysis$c01_2001_dist_town))

#use 2001 for missing 2011 data
analysis <- transform(analysis, distance = ifelse(!is.na(c11_2011_near_town_distance), c11_2011_near_town_distance, c01_2001_dist_town))
analysis$log_distance <- log(analysis$distance+0.1)

#rescale percent electrified
analysis$elecrate <- analysis$percent_elec*100

#rename years
analysis$years <- analysis$v_q21_1_elec_year

#Save df with missingness for 
years_missing <- analysis
years_missing$Missingness <- as.numeric(is.na(years_missing$years))

#Drop all missing for years
analysis <- analysis[!(is.na(analysis$years)),]

### MAPPING DATA PREP ###
district_year <- group_by(analysis, v_q3_district_name) %>%
  summarise(mean(v_q21_1_elec_year, na.rm=TRUE))
names(district_year)[2] <- "district_year"

india_df1 <- fortify(india, region = "DISTRICT")
india_df2 <- fortify(india, region="ST_NM")
india_df1$id <- toupper(india_df1$id)

#change names to match 
district_year$names <- dplyr::recode(district_year$v_q3_district_name, BIJNOUR="BIJNOR")
district_year$names <- dplyr::recode(district_year$names, BULANDSHAHAR="BULANDSHAHR")
district_year$names <- dplyr::recode(district_year$names, "KHANDWA (EAST NIMAR)"="BIJNOR")
district_year$names <- dplyr::recode(district_year$names, "NORTH TWENTY FOUR PARGANAS"="NORTH 24 PARGANAS")
district_year$names <- dplyr::recode(district_year$names, "PASCHIM MEDNIPUR"="PASHCHIM MEDINIPUR")
district_year$names <- dplyr::recode(district_year$names, "PURWA CHAMPARAN"="PURBA CHAMPARAN")
district_year$names <- dplyr::recode(district_year$names, SIDDHARTHNAGAR="SIDDHARTH NAGAR")

states <- unique(analysis$v_q1_state_name)

india_df2$states <- toupper(india_df2$id)

india_st <- india_df2[india_df2$states %in% states,]

#eliminate the wrong Pratapgarh
india_df1 <- india_df1[!(india_df1$id=="PRATAPGARH" & india_df1$long<80),]

### PROPENSITY SCORE DATA PREP ###
#Untransformed balance
vars<- c("outages", "dailyhours", "elecrate", "years", "log_distance", "log_households", "scheduled", "v_q1_state_code", "v_q3_district_code")
analysis_full <- analysis[vars]
analysis_full <- analysis_full[complete.cases(analysis_full),]
prop.score <- CBPS(years ~ log_distance + log_households + scheduled + as.factor(v_q1_state_code), data = analysis_full)
balance1 <- balance(prop.score)

#Transform years variable
lambda <- powerTransform(analysis_full$years)
analysis_full$tran.years <- bcPower(analysis_full$years, lambda= lambda$lambda)

#Transformed balance
prop.score2 <- CBPS(tran.years ~ log_distance + log_households + scheduled + as.factor(v_q1_state_code), data = analysis_full)
balance2 <- balance(prop.score2)

#Prepare balance table data
Balanced <- balance1$balanced
Unweighted <- balance1$unweighted
Balanced_tran <- balance2$balanced
Unweighted_tran <- balance2$unweighted
PCC <- cbind(Unweighted, Balanced, Unweighted_tran, Balanced_tran)
PCC <- PCC[1:3, 1:4]
colnames(PCC) <- c("Unweighted", "Balanced", "Unweighted (Box-Cox)", "Balanced (Box-Cox)")
row.names(PCC) <- c("Distance to Town (log)", "Number of Households (log)", "Percent Scheduled Caste/Tribe")

###########
##FIGURES##
###########

### Figure 1 ###
map <- ggplot() + 
  geom_path(data = india_df1, 
            aes(x = long, y = lat, group = group),
            color = 'gray', size = .1) + 
  geom_path(data = india_st, 
            aes(x = long, y = lat, group = group),
            color = 'black', size = .2) +
  geom_map(data = district_year, aes(map_id = names, fill = district_year), 
           map = india_df1) + 
  expand_limits(x = india_df1$long, y = india_df1$lat)+
  xlab("Longitude")+
  ylab("Latitude")+
  labs(fill = "Years Since Electrification")

print(map)


### Figure 2 ###
hist(analysis$elecrate, main="Histogram of Electrification Rate", xlab="Electrification Rate")

hist(analysis$outages, main="Histogram of Monthly Outages", xlab="Monthly Outages")

hist(analysis$dailyhours, main = "Histogram of Daily Hours of Electricity", xlab="Daily Hours of Electricity")

### Figure 3 ###
hist(analysis$v_q21_1_elec_year, main="Histogram of Years Since Electrification", xlab = "Years Since Electrification")

### Figure 4 ###
ggplot(analysis, aes(v_q21_1_elec_year, elecrate)) + geom_point() + geom_smooth(method="lm", formula = y ~ x)+xlab("Years Since Electrification")+ylab("Electrification Rate")

ggplot(analysis, aes(v_q21_1_elec_year, dailyhours)) + geom_point() + geom_smooth(method="lm", formula = y ~ x)+xlab("Years Since Electrification")+ylab("Daily Hours of Electricity")

ggplot(analysis, aes(v_q21_1_elec_year, outages)) + geom_point() + geom_smooth(method="lm", formula = y ~ x)+xlab("Years Since Electrification")+ylab("Monthly Outages")

### Figure 5 ###
#create the periods
analysis$years_cat <- cut(analysis$years, c(0, 10, 20, 30, 40, 50, 60))

#plot it
ggplot(analysis, aes(years_cat, elecrate)) + geom_boxplot()+xlab("Years Since Electrification")+ylab("Electrification Rate")



ggplot(analysis, aes(years_cat, dailyhours)) + geom_boxplot()+xlab("Years Since Electrification")+ylab("Daily Hours of Electricity")


ggplot(analysis, aes(years_cat, outages)) + geom_boxplot()+xlab("Years Since Electrification")+ylab("Monthly Outages")

### Figure 6 ###
hist(analysis_full$tran.years, main="Histogram of Transformed Years Since Electrification", xlab = "Transformed Years Since Electrification (Box-Cox)")

##########
##TABLES##
##########

### Table 1 ###
stargazer(analysis[c("years", "outages", "dailyhours", "elecrate", "c01_2001_dist_town", "log_distance", "total_households", "log_households", "scheduled")], type="latex", float = F, covariate.labels = c("Years Since Elec.", "Monthly Outages", "Daily Hours of Elec.", "Village Electrification Rate", "Distance to Town", "Distance to Town (log)", "Number of Households", "Number of Households (log)", "Percent Scheduled Caste/Tribe"))

### Table 2 ###
#Model 1: year, no FE – full controls
M1 <-  glm(Missingness ~ log_distance + log_households + scheduled, family=binomial(link="logit"), data=years_missing)
vcovM1 <-cluster.vcov(M1, as.factor(years_missing$v_q3_district_code)) 
seM1 <- coeftest(M1, vcovM1)

#Model 2: year, state FE – full controls
M2 <- glm(Missingness ~ log_distance + log_households + scheduled + as.factor(v_q1_state_code), family=binomial(link="logit"), data=years_missing)
vcovM2 <-cluster.vcov(M2, as.factor(years_missing$v_q3_district_code)) 
seM2 <- coeftest(M2, vcovM2)

#Model 3: year, dist FE – full controls
M3 <- glm(Missingness ~ log_distance + log_households + scheduled + as.factor(v_q3_district_code), family=binomial(link="logit"), data=years_missing)
vcovM3 <-cluster.vcov(M3, as.factor(years_missing$v_q3_district_code)) 
seM3 <- coeftest(M3, vcovM3)
#lists for stargazer
fit.list1 <- list(M1, M2, M3)
se.list1 <- list(seM1[,2], seM2[,2], seM3[,2])
omit.stats <- c("rsq","ser","f")
cov.list1 <- c("Distance to Town (log)", "Number of Households (log)", "Percent Scheduled Caste/Tribe")


stargazer(title = "Covariates Predicting Missingness of Years Since Electrification Variable",
          se = se.list1,
          header = F,
          fit.list1,
          covariate.labels = cov.list1,
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit=c("as.factor"),
          dep.var.labels = "Missingness of Years Since Electrification Variable",
          notes = "Standard errors clustered at district level.",
          add.lines = list(c("State FE?","No","Yes","No"),
                           c("District FE?","No","No","Yes")))


### Table 3 ###
stargazer(PCC, title = "Pearson Correlation Coefficients for Balanced and Unweighted Covariates", float=F)

### Table 4 ###
#model 1: year, no FE
EM1 <- lm(elecrate ~ years, data=analysis)
vcovEM1 <-cluster.vcov(EM1, as.factor(analysis$v_q3_district_code)) 
seEM1 <- coeftest(EM1, vcovEM1)

#model 2: year, state FE
EM2 <- lm(elecrate ~ years + as.factor(v_q1_state_code), data=analysis)
vcovEM2 <-cluster.vcov(EM2, as.factor(analysis$v_q3_district_code)) 
seEM2 <- coeftest(EM2, vcovEM2)

#model 3: year, dist FE
EM3 <- lm(elecrate ~ years + as.factor(v_q3_district_code), data=analysis)
vcovEM3 <-cluster.vcov(EM3, as.factor(analysis$v_q3_district_code)) 
seEM3 <- coeftest(EM3, vcovEM3)

#model 4: year, no FE, controls
EM4 <-lm(elecrate ~ years + log_distance + log_households + scheduled, data=analysis)
vcovEM4 <-cluster.vcov(EM4, as.factor(analysis$v_q3_district_code)) 
seEM4 <- coeftest(EM4, vcovEM4)

#model 5: year, state FE, controls
EM5 <- lm(elecrate ~ years + log_distance + log_households + scheduled + as.factor(v_q1_state_code), data=analysis)
vcovEM5 <-cluster.vcov(EM5, as.factor(analysis$v_q3_district_code)) 
seEM5 <- coeftest(EM5, vcovEM5)

#model 6: year, dist FE, controls
EM6 <- lm(elecrate ~ years + log_distance + log_households + scheduled + as.factor(v_q3_district_code), data=analysis)
vcovEM6 <-cluster.vcov(EM6, as.factor(analysis$v_q3_district_code)) 
seEM6 <- coeftest(EM6, vcovEM6)

#lists for stargazer
fit.listE <- list(EM1, EM2, EM3, EM4, EM5, EM6)
se.listE <- list(seEM1[,2], seEM2[,2], seEM3[,2], seEM4[,2], seEM5[,2], seEM6[,2])
omit.stats <- c("rsq","ser","f")
cov.list <- c("Years Since Elec.", "Distance to Town (log)", "Number of Households (log)", "Percent Scheduled Caste/Tribe")

#create the table
stargazer(title = "Years Since Electrification and Electrification Rate",
          se = se.listE,
          header = F,
          fit.listE,
          covariate.labels = cov.list,
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit=c("as.factor"),
          dep.var.labels = "Village Electrification Rate",
          notes = "Standard errors clustered at district level.",
          add.lines = list(c("State FE?","No","Yes","No","No","Yes","No"),
                           c("District FE?","No","No","Yes","No","No","Yes")))


### Table 5 ###
#Model 1: year, no FE
HM1 <- lm(dailyhours ~ years, data=analysis)
vcovHM1 <-cluster.vcov(HM1, as.factor(analysis$v_q3_district_code)) 
seHM1 <- coeftest(HM1, vcovHM1)

#model 2: year, state FE
HM2 <- lm(dailyhours ~ years + as.factor(v_q1_state_code), data=analysis)
vcovHM2 <-cluster.vcov(HM2, as.factor(analysis$v_q3_district_code)) 
seHM2 <- coeftest(HM2, vcovHM2)

#model 3: year, dist FE
HM3 <- lm(dailyhours ~ years + as.factor(v_q3_district_code), data=analysis)
vcovHM3 <-cluster.vcov(HM3, as.factor(analysis$v_q3_district_code)) 
seHM3 <- coeftest(HM3, vcovHM3)

#model 4: year, no FE, controls
HM4 <-lm(dailyhours ~ years + log_distance + log_households + scheduled, data=analysis)
vcovHM4 <-cluster.vcov(HM4, as.factor(analysis$v_q3_district_code)) 
seHM4 <- coeftest(HM4, vcovHM4)

#model 5: year, state FE, controls
HM5 <- lm(dailyhours ~ years + log_distance + log_households + scheduled + as.factor(v_q1_state_code), data=analysis)
vcovHM5 <-cluster.vcov(HM5, as.factor(analysis$v_q3_district_code)) 
seHM5 <- coeftest(HM5, vcovHM5)

#model 6: year, dist FE, controls
HM6 <- lm(dailyhours ~ years + log_distance + log_households + scheduled + as.factor(v_q3_district_code), data=analysis)
vcovHM6 <-cluster.vcov(HM6, as.factor(analysis$v_q3_district_code)) 
seHM6 <- coeftest(HM6, vcovHM6)

#lists for stargazer
fit.listH <- list(HM1, HM2, HM3, HM4, HM5, HM6)
se.listH <- list(seHM1[,2], seHM2[,2], seHM3[,2], seHM4[,2], seHM5[,2], seHM6[,2])
omit.stats <- c("rsq","ser","f")

#create the table
stargazer(title = "Years Since Electrification and Daily Hours",
          se = se.listH,
          header = F,
          fit.listH,
          covariate.labels = cov.list,
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit=c("as.factor"),
          dep.var.labels = "Daily Hours of Electricity",
          notes = "Standard errors clustered at district level.",
          add.lines = list(c("State FE?","No","Yes","No","No","Yes","No"),
                           c("District FE?","No","No","Yes","No","No","Yes")))

### Table 6 ###
#Model 1: year, no FE
OM1 <- lm(outages ~ years, data=analysis)
vcovOM1 <-cluster.vcov(OM1, as.factor(analysis$v_q3_district_code)) 
seOM1 <- coeftest(OM1, vcovOM1)

#model 2: year, state FE
OM2 <- lm(outages ~ years + as.factor(v_q1_state_code), data=analysis)
vcovOM2 <-cluster.vcov(OM2, as.factor(analysis$v_q3_district_code)) 
seOM2 <- coeftest(OM2, vcovOM2)

#model 3: year, dist FE
OM3 <- lm(outages ~ years + as.factor(v_q3_district_code), data=analysis)
vcovOM3 <-cluster.vcov(OM3, as.factor(analysis$v_q3_district_code)) 
seOM3 <- coeftest(OM3, vcovOM3)

#model 4: year, no FE, controls
OM4 <-lm(outages ~ years + log_distance + log_households + scheduled, data=analysis)
vcovOM4 <-cluster.vcov(OM4, as.factor(analysis$v_q3_district_code)) 
seOM4 <- coeftest(OM4, vcovOM4)

#model 5: year, state FE, controls
OM5 <- lm(outages ~ years + log_distance + log_households + scheduled + as.factor(v_q1_state_code), data=analysis)
vcovOM5 <-cluster.vcov(OM5, as.factor(analysis$v_q3_district_code)) 
seOM5 <- coeftest(OM5, vcovOM5)

#model 6: year, dist FE, controls
OM6 <- lm(outages ~ years + log_distance + log_households + scheduled + as.factor(v_q3_district_code), data=analysis)
vcovOM6 <-cluster.vcov(OM6, as.factor(analysis$v_q3_district_code)) 
seOM6 <- coeftest(OM6, vcovOM6)

#lists for stargazer
fit.listO <- list(OM1, OM2, OM3, OM4, OM5, OM6)
se.listO <- list(seOM1[,2], seOM2[,2], seOM3[,2], seOM4[,2], seOM5[,2], seOM6[,2])
omit.stats <- c("rsq","ser","f")

#create the table
stargazer(title = "Years Since Electrification and Monthly Outages",
          se = se.listO,
          header = F,
          fit.listO,
          covariate.labels = cov.list,
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit=c("as.factor"),
          dep.var.labels = "Monthly Outages",
          notes = "Standard errors clustered at district level.",
          add.lines = list(c("State FE?","No","Yes","No","No","Yes","No"),
                           c("District FE?","No","No","Yes","No","No","Yes")))

### Table 7 ###
#################
##Results: Rate##
#################

#model 1: year, no FE
pEM1t <- lm(elecrate ~ tran.years, weights = prop.score2$weights, data=analysis_full)
vcovpEM1t <-cluster.vcov(pEM1t, as.factor(analysis_full$v_q3_district_code)) 
sepEM1t <- coeftest(pEM1t, vcovpEM1t)
#model 2: year, state FE
pEM2t <- lm(elecrate ~ tran.years + as.factor(v_q1_state_code), weights = prop.score2$weights, data=analysis_full)
vcovpEM2t <-cluster.vcov(pEM2t, as.factor(analysis_full$v_q3_district_code)) 
sepEM2t <- coeftest(pEM2t, vcovpEM2t)

#model 3: year, dist FE
pEM3t <- lm(elecrate ~ tran.years + as.factor(v_q3_district_code), weights = prop.score2$weights, data=analysis_full)
vcovpEM3t <-cluster.vcov(pEM3t, as.factor(analysis_full$v_q3_district_code)) 
sepEM3t <- coeftest(pEM3t, vcovpEM3t)

#model 4: year, no FE, controls
pEM4t <-lm(elecrate ~ tran.years + log_distance + log_households + scheduled, weights = prop.score2$weights, data=analysis_full)
vcovpEM4t <-cluster.vcov(pEM4t, as.factor(analysis_full$v_q3_district_code)) 
sepEM4t <- coeftest(pEM4t, vcovpEM4t)

#model 5: year, state FE, controls
pEM5t <- lm(elecrate ~ tran.years + log_distance + log_households + scheduled + as.factor(v_q1_state_code), weights = prop.score2$weights, data=analysis_full)
vcovpEM5t <-cluster.vcov(pEM5t, as.factor(analysis_full$v_q3_district_code)) 
sepEM5t <- coeftest(pEM5t, vcovpEM5t)

#model 6: year, dist FE, controls
pEM6t <- lm(elecrate ~ tran.years + log_distance + log_households + scheduled + as.factor(v_q3_district_code), weights = prop.score2$weights, data=analysis_full)
vcovpEM6t <-cluster.vcov(pEM6t, as.factor(analysis_full$v_q3_district_code)) 
sepEM6t <- coeftest(pEM6t, vcovpEM6t)


#lists for stargazer
fit.listpEt <- list(pEM1t, pEM2t, pEM3t, pEM4t, pEM5t, pEM6t)
se.listpEt <- list(sepEM1t[,2], sepEM2t[,2], sepEM3t[,2], sepEM4t[,2], sepEM5t[,2], sepEM6t[,2])
omit.stats <- c("rsq","ser","f")
cov.list <- c("Years Since Elec.", "Distance to Town (log)", "Number of Households (log)", "Percent Scheduled Caste/Tribe")
#create the table
stargazer(title = "Propensity Score Weighted Years Since Electrification and Electrification Rate",
          se = se.listpEt,
          header = F,
          fit.listpEt,
          covariate.labels = cov.list,
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit=c("as.factor", "Constant"),
          table.layout = "-ld#-t-",
          dep.var.labels = "Village Electrification Rate")

##################
##Results: Hours##
##################


#Model 1: year, no FE
pHM1t <- lm(dailyhours ~ tran.years, weights = prop.score2$weights, data=analysis_full)
vcovpHM1t <-cluster.vcov(pHM1t, as.factor(analysis_full$v_q3_district_code)) 
sepHM1t <- coeftest(pHM1t, vcovpHM1t)
#model 2: year, state FE
pHM2t <- lm(dailyhours ~ tran.years + as.factor(v_q1_state_code), weights = prop.score2$weights, data=analysis_full)
vcovpHM2t <-cluster.vcov(pHM2t, as.factor(analysis_full$v_q3_district_code)) 
sepHM2t <- coeftest(pHM2t, vcovpHM2t)

#model 3: year, dist FE
pHM3t <- lm(dailyhours ~ tran.years + as.factor(v_q3_district_code), weights = prop.score2$weights, data=analysis_full)
vcovpHM3t <-cluster.vcov(pHM3t, as.factor(analysis_full$v_q3_district_code)) 
sepHM3t <- coeftest(pHM3t, vcovpHM3t)

#model 4: year, no FE, controls
pHM4t <-lm(dailyhours ~ tran.years + log_distance + log_households + scheduled, weights = prop.score2$weights, data=analysis_full)
vcovpHM4t <-cluster.vcov(pHM4t, as.factor(analysis_full$v_q3_district_code)) 
sepHM4t <- coeftest(pHM4t, vcovpHM4t)

#model 5: year, state FE, controls
pHM5t <- lm(dailyhours ~ tran.years + log_distance + log_households + scheduled + as.factor(v_q1_state_code), weights = prop.score2$weights, data=analysis_full)
vcovpHM5t <-cluster.vcov(pHM5t, as.factor(analysis_full$v_q3_district_code)) 
sepHM5t <- coeftest(pHM5t, vcovpHM5t)

#model 6: year, dist FE, controls
pHM6t <- lm(dailyhours ~ tran.years + log_distance + log_households + scheduled + as.factor(v_q3_district_code), weights = prop.score2$weights, data=analysis_full)
vcovpHM6t <-cluster.vcov(pHM6t, as.factor(analysis_full$v_q3_district_code)) 
sepHM6t <- coeftest(pHM6t, vcovpHM6t)

#lists for stargazer
fit.listpHt <- list(pHM1t, pHM2t, pHM3t, pHM4t, pHM5t, pHM6t)
se.listpHt <- list(sepHM1t[,2], sepHM2t[,2], sepHM3t[,2], sepHM4t[,2], sepHM5t[,2], sepHM6t[,2])
omit.stats <- c("rsq","ser","f")

#create the table
stargazer(title = "Propensity Score Weighted Years Since Electrification and Daily Hours",
          se = se.listpHt,
          header = F,
          fit.listpHt,
          covariate.labels = cov.list,
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          dep.var.labels = "Daily Hours of Electricity",
          omit=c("as.factor", "Constant"),
          table.layout = "-ld#-t-")


####################
##Results: Outages##
####################


#Model 1: year, no FE
pOM1t <- lm(outages ~ tran.years, weights = prop.score2$weights, data=analysis_full)
vcovpOM1t <-cluster.vcov(pOM1t, as.factor(analysis_full$v_q3_district_code)) 
sepOM1t <- coeftest(pOM1t, vcovpOM1t)
#model 2: year, state FE
pOM2t <- lm(outages ~ tran.years + as.factor(v_q1_state_code), weights = prop.score2$weights, data=analysis_full)
vcovpOM2t <-cluster.vcov(pOM2t, as.factor(analysis_full$v_q3_district_code)) 
sepOM2t <- coeftest(pOM2t, vcovpOM2t)

#model 3: year, dist FE
pOM3t <- lm(outages ~ tran.years + as.factor(v_q3_district_code), weights = prop.score2$weights, data=analysis_full)
vcovpOM3t <-cluster.vcov(pOM3t, as.factor(analysis_full$v_q3_district_code)) 
sepOM3t <- coeftest(pOM3t, vcovpOM3t)

#model 4: year, no FE, controls
pOM4t <-lm(outages ~ tran.years + log_distance + log_households + scheduled, weights = prop.score2$weights, data=analysis_full)
vcovpOM4t <-cluster.vcov(pOM4t, as.factor(analysis_full$v_q3_district_code)) 
sepOM4t <- coeftest(pOM4t, vcovpOM4t)

#model 5: year, state FE, controls
pOM5t <- lm(outages ~ tran.years + log_distance + log_households + scheduled + as.factor(v_q1_state_code), weights = prop.score2$weights, data=analysis_full)
vcovpOM5t <-cluster.vcov(pOM5t, as.factor(analysis_full$v_q3_district_code)) 
sepOM5t <- coeftest(pOM5t, vcovpOM5t)

#model 6: year, dist FE, controls
pOM6t <- lm(outages ~ tran.years + log_distance + log_households + scheduled + as.factor(v_q3_district_code), weights = prop.score2$weights, data=analysis_full)
vcovpOM6t <-cluster.vcov(pOM6t, as.factor(analysis_full$v_q3_district_code)) 
sepOM6t <- coeftest(pOM6t, vcovpOM6t)

#lists for stargazer
fit.listpOt <- list(pOM1t, pOM2t, pOM3t, pOM4t, pOM5t, pOM6t)
se.listpOt <- list(sepOM1t[,2], sepOM2t[,2], sepOM3t[,2], sepOM4t[,2], sepOM5t[,2], sepOM6t[,2])
omit.stats <- c("rsq","ser","f", "adj.rsq")

#create the table
stargazer(title = "Propensity Score Weighted Years Since Electrification and Monthly Outages",
          se = se.listpOt,
          header = F,
          fit.listpOt,
          covariate.labels = cov.list,
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit=c("as.factor", "Constant"),
          dep.var.labels = "Monthly Outages",
          notes = "Standard errors clustered at district level.",
          add.lines = list(c("State FE?","No","Yes","No","No","Yes","No"),
                           c("District FE?","No","No","Yes","No","No","Yes")))

############
##APPENDIX##
############
### Table A1 ###
#################
##Results: Rate##
#################

#model 1: year, no FE
pEM1 <- lm(elecrate ~ years, weights = prop.score$weights, data=analysis_full)
vcovpEM1 <-cluster.vcov(pEM1, as.factor(analysis_full$v_q3_district_code)) 
sepEM1 <- coeftest(pEM1, vcovpEM1)
#model 2: year, state FE
pEM2 <- lm(elecrate ~ years + as.factor(v_q1_state_code), weights = prop.score$weights, data=analysis_full)
vcovpEM2 <-cluster.vcov(pEM2, as.factor(analysis_full$v_q3_district_code)) 
sepEM2 <- coeftest(pEM2, vcovpEM2)

#model 3: year, dist FE
pEM3 <- lm(elecrate ~ years + as.factor(v_q3_district_code), weights = prop.score$weights, data=analysis_full)
vcovpEM3 <-cluster.vcov(pEM3, as.factor(analysis_full$v_q3_district_code)) 
sepEM3 <- coeftest(pEM3, vcovpEM3)

#model 4: year, no FE, controls
pEM4 <-lm(elecrate ~ years + log_distance + log_households + scheduled, weights = prop.score$weights, data=analysis_full)
vcovpEM4 <-cluster.vcov(pEM4, as.factor(analysis_full$v_q3_district_code)) 
sepEM4 <- coeftest(pEM4, vcovpEM4)

#model 5: year, state FE, controls
pEM5 <- lm(elecrate ~ years + log_distance + log_households + scheduled + as.factor(v_q1_state_code), weights = prop.score$weights, data=analysis_full)
vcovpEM5 <-cluster.vcov(pEM5, as.factor(analysis_full$v_q3_district_code)) 
sepEM5 <- coeftest(pEM5, vcovpEM5)

#model 6: year, dist FE, controls
pEM6 <- lm(elecrate ~ years + log_distance + log_households + scheduled + as.factor(v_q3_district_code), weights = prop.score$weights, data=analysis_full)
vcovpEM6 <-cluster.vcov(pEM6, as.factor(analysis_full$v_q3_district_code)) 
sepEM6 <- coeftest(pEM6, vcovpEM6)

#lists for stargazer
fit.listpE <- list(pEM1, pEM2, pEM3, pEM4, pEM5, pEM6)
se.listpE <- list(sepEM1[,2], sepEM2[,2], sepEM3[,2], sepEM4[,2], sepEM5[,2], sepEM6[,2])
omit.stats <- c("rsq","ser","f")
cov.list <- c("Years Since Elec.", "Distance to Town (log)", "Number of Households (log)", "Percent Scheduled Caste/Tribe")
#create the table
stargazer(title = "Propensity Score Weighted Years Since Electrification and Electrification Rate",
          se = se.listpE,
          header = F,
          fit.listpE,
          covariate.labels = cov.list,
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit=c("as.factor", "Constant"),
          table.layout = "-ld#-t-",
          dep.var.labels = "Village Electrification Rate")


##################
##Results: Hours##
##################


#Model 1: year, no FE
pHM1 <- lm(dailyhours ~ years, weights = prop.score$weights, data=analysis_full)
vcovpHM1 <-cluster.vcov(pHM1, as.factor(analysis_full$v_q3_district_code)) 
sepHM1 <- coeftest(pHM1, vcovpHM1)
#model 2: year, state FE
pHM2 <- lm(dailyhours ~ years + as.factor(v_q1_state_code), weights = prop.score$weights, data=analysis_full)
vcovpHM2 <-cluster.vcov(pHM2, as.factor(analysis_full$v_q3_district_code)) 
sepHM2 <- coeftest(pHM2, vcovpHM2)

#model 3: year, dist FE
pHM3 <- lm(dailyhours ~ years + as.factor(v_q3_district_code), weights = prop.score$weights, data=analysis_full)
vcovpHM3 <-cluster.vcov(pHM3, as.factor(analysis_full$v_q3_district_code)) 
sepHM3 <- coeftest(HM3, vcovpHM3)

#model 4: year, no FE, controls
pHM4 <-lm(dailyhours ~ years + log_distance + log_households + scheduled, weights = prop.score$weights, data=analysis_full)
vcovpHM4 <-cluster.vcov(pHM4, as.factor(analysis_full$v_q3_district_code)) 
sepHM4 <- coeftest(pHM4, vcovpHM4)

#model 5: year, state FE, controls
pHM5 <- lm(dailyhours ~ years + log_distance + log_households + scheduled + as.factor(v_q1_state_code), weights = prop.score$weights, data=analysis_full)
vcovpHM5 <-cluster.vcov(pHM5, as.factor(analysis_full$v_q3_district_code)) 
sepHM5 <- coeftest(pHM5, vcovpHM5)

#model 6: year, dist FE, controls
pHM6 <- lm(dailyhours ~ years + log_distance + log_households + scheduled + as.factor(v_q3_district_code), weights = prop.score$weights, data=analysis_full)
vcovpHM6 <-cluster.vcov(pHM6, as.factor(analysis_full$v_q3_district_code)) 
sepHM6 <- coeftest(pHM6, vcovpHM6)

#lists for stargazer
fit.listpH <- list(pHM1, pHM2, pHM3, pHM4, pHM5, pHM6)
se.listpH <- list(sepHM1[,2], sepHM2[,2], sepHM3[,2], sepHM4[,2], sepHM5[,2], sepHM6[,2])
omit.stats <- c("rsq","ser","f")

#create the table
stargazer(title = "Propensity Score Weighted Years Since Electrification and Daily Hours",
          se = se.listpH,
          header = F,
          fit.listpH,
          covariate.labels = cov.list,
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          dep.var.labels = "Daily Hours of Electricity",
          omit=c("as.factor", "Constant"),
          table.layout = "-ld#-t-")


####################
##Results: Outages##
####################


#Model 1: year, no FE
pOM1 <- lm(outages ~ years, weights = prop.score$weights, data=analysis_full)
vcovpOM1 <-cluster.vcov(pOM1, as.factor(analysis_full$v_q3_district_code)) 
sepOM1 <- coeftest(pOM1, vcovpOM1)
#model 2: year, state FE
pOM2 <- lm(outages ~ years + as.factor(v_q1_state_code), weights = prop.score$weights, data=analysis_full)
vcovpOM2 <-cluster.vcov(pOM2, as.factor(analysis_full$v_q3_district_code)) 
sepOM2 <- coeftest(pOM2, vcovpOM2)

#model 3: year, dist FE
pOM3 <- lm(outages ~ years + as.factor(v_q3_district_code), weights = prop.score$weights, data=analysis_full)
vcovpOM3 <-cluster.vcov(pOM3, as.factor(analysis_full$v_q3_district_code)) 
sepOM3 <- coeftest(pOM3, vcovpOM3)

#model 4: year, no FE, controls
pOM4 <-lm(outages ~ years + log_distance + log_households + scheduled, weights = prop.score$weights, data=analysis_full)
vcovpOM4 <-cluster.vcov(pOM4, as.factor(analysis_full$v_q3_district_code)) 
sepOM4 <- coeftest(pOM4, vcovpOM4)

#model 5: year, state FE, controls
pOM5 <- lm(outages ~ years + log_distance + log_households + scheduled + as.factor(v_q1_state_code), weights = prop.score$weights, data=analysis_full)
vcovpOM5 <-cluster.vcov(pOM5, as.factor(analysis_full$v_q3_district_code)) 
sepOM5 <- coeftest(pOM5, vcovpOM5)

#model 6: year, dist FE, controls
pOM6 <- lm(outages ~ years + log_distance + log_households + scheduled + as.factor(v_q3_district_code), weights = prop.score$weights, data=analysis_full)
vcovpOM6 <-cluster.vcov(pOM6, as.factor(analysis_full$v_q3_district_code)) 
sepOM6 <- coeftest(pOM6, vcovpOM6)

#lists for stargazer
fit.listpO <- list(pOM1, pOM2, pOM3, pOM4, pOM5, pOM6)
se.listpO <- list(sepOM1[,2], sepOM2[,2], sepOM3[,2], sepOM4[,2], sepOM5[,2], sepOM6[,2])
omit.stats <- c("rsq","ser","f", "adj.rsq")

#create the table
stargazer(title = "Propensity Score Weighted Years Since Electrification and Monthly Outages",
          se = se.listpO,
          header = F,
          fit.listpO,
          covariate.labels = cov.list,
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit=c("as.factor", "Constant"),
          dep.var.labels = "Monthly Outages",
          notes = "Standard errors clustered at district level.",
          add.lines = list(c("State FE?","No","Yes","No","No","Yes","No"),
                           c("District FE?","No","No","Yes","No","No","Yes")))

### Table A2 ###


#model EM8: year, RE, controls
EM8 <- lmer(elecrate ~ years + log_distance + log_households + scheduled + (1|v_q1_state_code), data=analysis)

#model HM8: year, RE, controls
HM8 <- lmer(dailyhours ~ years + log_distance + log_households + scheduled + (1|v_q1_state_code), data=analysis)

#model OM8: year, RE, controls
OM8 <- lmer(outages ~ years + log_distance + log_households + scheduled + (1|v_q1_state_code), data=analysis)

fit.listRE <- list(EM8, HM8, OM8)
omit.stats <- c("rsq","ser","f")

stargazer(title = "Random Effects Estimation",
          header = F,
          fit.listRE,
          covariate.labels = cov.list,
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit=c("as.factor"),
          dep.var.labels = c("Electrification Rate", "Daily Hours", "Monthly Outages"),
          notes = "All models include random effects at the state level")

### Table A3 ###
#Interaction models 
int1 <- lm(elecrate ~ years*log_distance + log_households + scheduled + as.factor(v_q3_district_code), data=analysis) #elecrate DV
vcovint1 <-cluster.vcov(int1, as.factor(analysis$v_q3_district_code)) 
seint1 <- coeftest(int1, vcovint1)

int2 <- lm(elecrate ~ years*log_households + log_distance + scheduled + as.factor(v_q3_district_code), data=analysis)
vcovint2 <-cluster.vcov(int2, as.factor(analysis$v_q3_district_code)) 
seint2 <- coeftest(int2, vcovint2)

int3 <- lm(elecrate ~ years*scheduled + log_distance + log_households + as.factor(v_q3_district_code), data=analysis)
vcovint3 <-cluster.vcov(int3, as.factor(analysis$v_q3_district_code)) 
seint3 <- coeftest(int3, vcovint3)


int4 <- lm(dailyhours ~ years*log_distance + log_households + scheduled + as.factor(v_q3_district_code), data=analysis) #dailyhours DV
vcovint4 <-cluster.vcov(int4, as.factor(analysis$v_q3_district_code)) 
seint4 <- coeftest(int4, vcovint4)

int5 <- lm(dailyhours ~ years*log_households + log_distance + scheduled + as.factor(v_q3_district_code), data=analysis)
vcovint5 <-cluster.vcov(int5, as.factor(analysis$v_q3_district_code)) 
seint5 <- coeftest(int5, vcovint5)

int6 <- lm(dailyhours ~ years*scheduled + log_distance + log_households + as.factor(v_q3_district_code), data=analysis)
vcovint6 <-cluster.vcov(int6, as.factor(analysis$v_q3_district_code)) 
seint6 <- coeftest(int6, vcovint6)


int7 <- lm(outages ~ years*log_distance + log_households + scheduled + as.factor(v_q3_district_code), data=analysis) #outages DV
vcovint7 <-cluster.vcov(int7, as.factor(analysis$v_q3_district_code)) 
seint7 <- coeftest(int7, vcovint7)

int8 <- lm(outages ~ years*log_households + log_distance + scheduled + as.factor(v_q3_district_code), data=analysis)
vcovint8 <-cluster.vcov(int8, as.factor(analysis$v_q3_district_code)) 
seint8 <- coeftest(int8, vcovint8)

int9 <- lm(outages ~ years*scheduled + log_distance + log_households + as.factor(v_q3_district_code), data=analysis)
vcovint9 <-cluster.vcov(int9, as.factor(analysis$v_q3_district_code)) 
seint9 <- coeftest(int9, vcovint9)

#interaction lists
fit.listint <- list(int1, int2, int3, int4, int5, int6, int7, int8, int9)
se.listint <- list(seint1[,2], seint2[,2], seint3[,2], seint4[,2], seint5[,2], seint6[,2], seint7[,2], seint8[,2], seint9[,2])
omit.stats <- c("rsq","ser","f")

#interaction table
stargazer(title = "Regression Models Including Interaction Terms",
          se = se.listint,
          header = F,
          fit.listint,
          covariate.labels = c("Years Since Elec.", "Distance to Town (log)", "Number of Households (log)", "Percent Scheduled Caste/Tribe", "Years:Distance (log)", "Years:Households (log)", "Years:Caste"),
          omit.stat = omit.stats,
          no.space = T,
          suppress.errors = F,float = F,
          model.names = F,
          omit=c("as.factor"),
          dep.var.labels = c("Electrification Rate", "Daily Hours of Electricity", "Monthly Outages"),
          notes = "Standard errors clustered at district level.",
          add.lines = list(c("District FE?","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes")))

